##Reading Data
options(max.print=999999999)
options(warn=-1)
library(caret)
library(readstata13)
library(TSstudio)
library(caret)
library(ROCR)
library(pROC)
library(xgboost)
library(gbm)
library(dplyr)
library(gtsummary)
library(xlsx)
library(rJava)
library(ggplot2)
library(segmented)
library(rpart)
library(tidyverse)
library(modelr)
library(dotwhisker)
library(dplyr)
library(ggplot2)
library(ggpubr)
library(broom)
library(lme4)
library(MuMIn)
library(AICcmodavg)
library(psycho)
library(performance)
library(pbkrtest)
library(estimability)
library(emmeans)
library(lsmeans)
library(ggiraph)
library(ggiraphExtra)
require(plyr)
data2<-read.dta13(file = "Y:/Haroon Janjua/Project_Lap_Open_Rob_FL AHCA_Charges/Data_iHernia/Hernia_18_Robotic_Inflation.dta")
# looking at the dimensions of the dataset
dim(data2)
## [1] 8604 24
# looking at any missing values in our dataset
sum(is.na(data2))
## [1] 0
# taking a look at the structure of the varaibles in the dataset
str(data2[1:24])
## 'data.frame': 8604 obs. of 24 variables:
## $ SYS_RECID : num 62533124 62612463 62532927 65090208 63313825 ...
## $ MCARE_NBR : chr "100006" "100006" "100006" "100006" ...
## $ Tot_Open : num 202 202 202 202 202 202 202 202 202 202 ...
## $ Tot_Lap : num 66 66 66 66 66 66 66 66 66 66 ...
## $ Tot_Rob : num 11 11 11 11 11 11 11 11 11 11 ...
## $ Tot_Hernia : num 279 279 279 279 279 279 279 279 279 279 ...
## $ TCHGS : num 46136 26501 25429 42431 34879 ...
## $ OPRMCHGS : num 17676 13616 10716 18775 10281 ...
## $ Non_OPR_CHGS : num 28460 12885 14713 23656 24598 ...
## $ COST : num 7213 4143 3976 6634 5453 ...
## $ Cost_OPR : num 2764 2129 1675 2935 1607 ...
## $ COST_Real : num 7663 4402 4224 7048 5793 ...
## $ COST_OPR_Real : num 2936 2262 1780 3118 1708 ...
## $ Non_OPR_Cost : num 4450 2015 2300 3699 3846 ...
## $ CCRATIO : num 0.156 0.156 0.156 0.156 0.156 ...
## $ OPR_PC : num 0.383 0.514 0.421 0.442 0.295 ...
## $ OPR_PC_CHG : num 0.383 0.514 0.421 0.442 0.295 ...
## $ TOTAL_HOSPITAL_BEDS: int 1454 1454 1454 1454 1454 1454 1454 1454 1454 1454 ...
## $ VISITS : int 1222 4755 790 1373 778 4987 4755 1362 778 1373 ...
## $ TIME : num 3 3 3 3 3 3 3 3 3 3 ...
## $ typeofproc2 : chr "OPEN" "OPEN" "OPEN" "OPEN" ...
## $ typeofproc : num 1 1 1 1 1 2 1 1 1 1 ...
## $ YEAR : num 2017 2017 2017 2017 2017 ...
## $ QTR : int 1 1 1 4 2 2 1 2 2 4 ...
data2[c(3:19)] <- lapply(data2[,c(3:19)] , as.numeric)
data2$typeofproc2 <- as.factor(data2$typeofproc2)
data2$typeofproc <- as.factor(data2$typeofproc)
# taking a look at the structure of the varaibles in the dataset
str(data2[1:24])
## 'data.frame': 8604 obs. of 24 variables:
## $ SYS_RECID : num 62533124 62612463 62532927 65090208 63313825 ...
## $ MCARE_NBR : chr "100006" "100006" "100006" "100006" ...
## $ Tot_Open : num 202 202 202 202 202 202 202 202 202 202 ...
## $ Tot_Lap : num 66 66 66 66 66 66 66 66 66 66 ...
## $ Tot_Rob : num 11 11 11 11 11 11 11 11 11 11 ...
## $ Tot_Hernia : num 279 279 279 279 279 279 279 279 279 279 ...
## $ TCHGS : num 46136 26501 25429 42431 34879 ...
## $ OPRMCHGS : num 17676 13616 10716 18775 10281 ...
## $ Non_OPR_CHGS : num 28460 12885 14713 23656 24598 ...
## $ COST : num 7213 4143 3976 6634 5453 ...
## $ Cost_OPR : num 2764 2129 1675 2935 1607 ...
## $ COST_Real : num 7663 4402 4224 7048 5793 ...
## $ COST_OPR_Real : num 2936 2262 1780 3118 1708 ...
## $ Non_OPR_Cost : num 4450 2015 2300 3699 3846 ...
## $ CCRATIO : num 0.156 0.156 0.156 0.156 0.156 ...
## $ OPR_PC : num 0.383 0.514 0.421 0.442 0.295 ...
## $ OPR_PC_CHG : num 0.383 0.514 0.421 0.442 0.295 ...
## $ TOTAL_HOSPITAL_BEDS: num 1454 1454 1454 1454 1454 ...
## $ VISITS : num 1222 4755 790 1373 778 ...
## $ TIME : num 3 3 3 3 3 3 3 3 3 3 ...
## $ typeofproc2 : Factor w/ 3 levels "LAP","OPEN","ROBOTIC": 2 2 2 2 2 1 2 2 2 2 ...
## $ typeofproc : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 2 1 1 1 1 ...
## $ YEAR : num 2017 2017 2017 2017 2017 ...
## $ QTR : int 1 1 1 4 2 2 1 2 2 4 ...
#Linear Regression Models
# fit LM for Open
set.seed(5842)
model.open <- lm(COST_OPR_Real ~ TIME , data = data2, subset = typeofproc2 == "OPEN")
summary(model.open)
##
## Call:
## lm(formula = COST_OPR_Real ~ TIME, data = data2, subset = typeofproc2 ==
## "OPEN")
##
## Residuals:
## Min 1Q Median 3Q Max
## -1726.0 -599.7 -183.6 404.2 10188.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1975.91 32.56 60.679 < 2e-16 ***
## TIME 66.89 9.34 7.162 9.48e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 868.7 on 3913 degrees of freedom
## Multiple R-squared: 0.01294, Adjusted R-squared: 0.01269
## F-statistic: 51.29 on 1 and 3913 DF, p-value: 9.478e-13
# fit LM for Lap
set.seed(5842)
model.lap <- lm(COST_OPR_Real ~ TIME , data = data2, subset = typeofproc2 == "LAP")
summary(model.lap)
##
## Call:
## lm(formula = COST_OPR_Real ~ TIME, data = data2, subset = typeofproc2 ==
## "LAP")
##
## Residuals:
## Min 1Q Median 3Q Max
## -2513.7 -954.1 -229.4 451.9 11775.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2447.07 76.70 31.905 < 2e-16 ***
## TIME 135.41 19.87 6.813 1.3e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1346 on 1784 degrees of freedom
## Multiple R-squared: 0.02536, Adjusted R-squared: 0.02481
## F-statistic: 46.42 on 1 and 1784 DF, p-value: 1.303e-11
# fit LM for Rob
set.seed(5842)
model.rob <- lm(COST_OPR_Real ~ TIME , data = data2, subset = typeofproc2 == "ROBOTIC")
summary(model.rob)
##
## Call:
## lm(formula = COST_OPR_Real ~ TIME, data = data2, subset = typeofproc2 ==
## "ROBOTIC")
##
## Residuals:
## Min 1Q Median 3Q Max
## -3609.9 -1283.8 -429.8 1115.8 12683.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3469.76 115.97 29.92 <2e-16 ***
## TIME 332.29 27.65 12.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1923 on 2901 degrees of freedom
## Multiple R-squared: 0.04742, Adjusted R-squared: 0.04709
## F-statistic: 144.4 on 1 and 2901 DF, p-value: < 2.2e-16
model.open$coefficients
## (Intercept) TIME
## 1975.91187 66.89208
model.lap$coefficients
## (Intercept) TIME
## 2447.0712 135.4059
model.rob$coefficients
## (Intercept) TIME
## 3469.7595 332.2948
#Analysis of Variance
set.seed(5842)
m.interaction <- lm(COST_OPR_Real ~ TIME*typeofproc2, data = data2)
anova(m.interaction)
## Analysis of Variance Table
##
## Response: COST_OPR_Real
## Df Sum Sq Mean Sq F value Pr(>F)
## TIME 1 2.1602e+09 2160171078 1098.450 < 2.2e-16 ***
## typeofproc2 2 9.8216e+09 4910779486 2497.139 < 2.2e-16 ***
## TIME:typeofproc2 2 2.2089e+08 110443997 56.161 < 2.2e-16 ***
## Residuals 8598 1.6909e+10 1966562
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m.lst <- lstrends(m.interaction, "typeofproc2", var="TIME") ; m.lst
## typeofproc2 TIME.trend SE df lower.CL upper.CL
## LAP 135.4 20.7 8598 94.8 176.0
## OPEN 66.9 15.1 8598 37.3 96.4
## ROBOTIC 332.3 20.2 8598 292.8 371.8
##
## Confidence level used: 0.95
pairs(m.lst)
## contrast estimate SE df t.ratio p.value
## LAP - OPEN 68.5 25.6 8598 2.675 0.0205
## LAP - ROBOTIC -196.9 28.9 8598 -6.811 <.0001
## OPEN - ROBOTIC -265.4 25.2 8598 -10.540 <.0001
##
## P value adjustment: tukey method for comparing a family of 3 estimates
#Interactive Linear Model Plots
ggPredict(model.open,se=TRUE,interactive=TRUE)
ggPredict(model.lap,se=TRUE,interactive=TRUE)
ggPredict(model.rob,se=TRUE,interactive=TRUE)
#Non Interactive Combined Linear Model Plots
data2 %>%
ggplot(aes(x=TIME,
y=COST_OPR_Real,
color=typeofproc2))+
geom_point()+
geom_smooth(method="lm")
data2 %>%
ggplot(aes(x=TIME,
y=COST_OPR_Real,
color=typeofproc2))+
geom_smooth(method="lm")
#Year by Year Linear Reg Models
data3<-read.dta13(file = "Y:/Haroon Janjua/Project_Lap_Open_Rob_FL AHCA_Charges/Data_iHernia/Hernia_18_Robotic_YEARLY_Inflation.dta")
# looking at the dimensions of the dataset
dim(data3)
## [1] 11266 15
# looking at any missing values in our dataset
sum(is.na(data3))
## [1] 45064
# taking a look at the structure of the varaibles in the dataset
str(data3[1:15])
## 'data.frame': 11266 obs. of 15 variables:
## $ SYS_RECID : num 65308574 64540880 62533219 64539131 63274186 ...
## $ COST : num 13124 10914 13713 7286 9474 ...
## $ Cost_OPR : num 7312 6531 7810 2809 4804 ...
## $ COST_Real : num 13942 11595 14569 7740 10065 ...
## $ COST_OPR_Real: num 7768 6939 8297 2984 5103 ...
## $ Non_OPR_Cost : num 5812 4383 5904 4477 4670 ...
## $ TIME : num 3 3 3 3 3 3 3 3 3 3 ...
## $ typeofproc2 : chr "ROBOTIC" "ROBOTIC" "ROBOTIC" "ROBOTIC" ...
## $ typeofproc : num 3 3 3 3 3 3 3 3 3 3 ...
## $ YEAR : num 2017 2017 2017 2017 2017 ...
## $ typeofproc3 : num NA NA NA NA NA NA NA NA NA NA ...
## $ typeofproc4 : num NA NA NA NA NA NA NA NA NA NA ...
## $ typeofproc5 : num NA NA NA NA NA NA NA NA NA NA ...
## $ typeofproc6 : num NA NA NA NA NA NA NA NA NA NA ...
## $ typeproc2 : chr "ROB_15-20" "ROB_15-20" "ROB_15-20" "ROB_15-20" ...
data3[c(2:6)] <- lapply(data3[,c(2:6)] , as.numeric)
data3$typeofproc2 <- as.factor(data3$typeofproc2)
data3$typeofproc <- as.factor(data3$typeofproc)
data3$typeproc2 <- as.factor(data3$typeproc2)
# taking a look at the structure of the varaibles in the dataset
str(data3[1:15])
## 'data.frame': 11266 obs. of 15 variables:
## $ SYS_RECID : num 65308574 64540880 62533219 64539131 63274186 ...
## $ COST : num 13124 10914 13713 7286 9474 ...
## $ Cost_OPR : num 7312 6531 7810 2809 4804 ...
## $ COST_Real : num 13942 11595 14569 7740 10065 ...
## $ COST_OPR_Real: num 7768 6939 8297 2984 5103 ...
## $ Non_OPR_Cost : num 5812 4383 5904 4477 4670 ...
## $ TIME : num 3 3 3 3 3 3 3 3 3 3 ...
## $ typeofproc2 : Factor w/ 2 levels "","ROBOTIC": 2 2 2 2 2 2 2 2 2 2 ...
## $ typeofproc : Factor w/ 1 level "3": 1 1 1 1 1 1 1 1 1 1 ...
## $ YEAR : num 2017 2017 2017 2017 2017 ...
## $ typeofproc3 : num NA NA NA NA NA NA NA NA NA NA ...
## $ typeofproc4 : num NA NA NA NA NA NA NA NA NA NA ...
## $ typeofproc5 : num NA NA NA NA NA NA NA NA NA NA ...
## $ typeofproc6 : num NA NA NA NA NA NA NA NA NA NA ...
## $ typeproc2 : Factor w/ 5 levels "ROB_15-20","ROB_16-20",..: 1 1 1 1 1 1 1 1 1 1 ...
# fit LM for Rob 2015-2020
set.seed(5877)
model.rob <- lm(COST_OPR_Real ~ TIME , data = data3, subset = typeproc2 == "ROB_15-20")
summary(model.rob)
##
## Call:
## lm(formula = COST_OPR_Real ~ TIME, data = data3, subset = typeproc2 ==
## "ROB_15-20")
##
## Residuals:
## Min 1Q Median 3Q Max
## -3609.9 -1283.8 -429.8 1115.8 12683.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3469.76 115.97 29.92 <2e-16 ***
## TIME 332.29 27.65 12.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1923 on 2901 degrees of freedom
## Multiple R-squared: 0.04742, Adjusted R-squared: 0.04709
## F-statistic: 144.4 on 1 and 2901 DF, p-value: < 2.2e-16
# fit LM for Rob 2016-2020
set.seed(5877)
model.rob16 <- lm(COST_OPR_Real ~ TIME , data = data3, subset = typeproc2 == "ROB_16-20")
summary(model.rob16)
##
## Call:
## lm(formula = COST_OPR_Real ~ TIME, data = data3, subset = typeproc2 ==
## "ROB_16-20")
##
## Residuals:
## Min 1Q Median 3Q Max
## -3637.8 -1305.2 -453.2 1103.8 9762.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3189.66 123.33 25.86 <2e-16 ***
## TIME 393.89 29.09 13.54 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1892 on 2835 degrees of freedom
## Multiple R-squared: 0.06076, Adjusted R-squared: 0.06042
## F-statistic: 183.4 on 1 and 2835 DF, p-value: < 2.2e-16
# fit LM for Rob 2017-2020
set.seed(5877)
model.rob17 <- lm(COST_OPR_Real ~ TIME ,data = data3, subset = typeproc2 == "ROB_17-20")
summary(model.rob17)
##
## Call:
## lm(formula = COST_OPR_Real ~ TIME, data = data3, subset = typeproc2 ==
## "ROB_17-20")
##
## Residuals:
## Min 1Q Median 3Q Max
## -3680.9 -1391.3 -416.4 1106.2 9661.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2793.15 174.13 16.04 <2e-16 ***
## TIME 476.77 38.89 12.26 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1924 on 2469 degrees of freedom
## Multiple R-squared: 0.05738, Adjusted R-squared: 0.05699
## F-statistic: 150.3 on 1 and 2469 DF, p-value: < 2.2e-16
# fit LM for Rob 2018-2020
set.seed(5877)
model.rob18 <- lm(COST_OPR_Real ~ TIME , data = data3, subset = typeproc2 == "ROB_18-20")
summary(model.rob18)
##
## Call:
## lm(formula = COST_OPR_Real ~ TIME, data = data3, subset = typeproc2 ==
## "ROB_18-20")
##
## Residuals:
## Min 1Q Median 3Q Max
## -3682.5 -1437.8 -375.7 1116.5 9660.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2783.75 322.39 8.635 < 2e-16 ***
## TIME 478.62 66.18 7.232 6.92e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1997 on 1851 degrees of freedom
## Multiple R-squared: 0.02748, Adjusted R-squared: 0.02696
## F-statistic: 52.31 on 1 and 1851 DF, p-value: 6.923e-13
# fit LM for Rob 2019-2020
set.seed(5877)
model.rob19 <- lm(COST_OPR_Real ~ TIME ,data = data3, subset = typeproc2 == "ROB_19-20")
summary(model.rob19)
##
## Call:
## lm(formula = COST_OPR_Real ~ TIME, data = data3, subset = typeproc2 ==
## "ROB_19-20")
##
## Residuals:
## Min 1Q Median 3Q Max
## -4132 -1374 -255 945 9211
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1411.7 670.9 -2.104 0.0356 *
## TIME 1252.8 127.0 9.867 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1944 on 1200 degrees of freedom
## Multiple R-squared: 0.07505, Adjusted R-squared: 0.07427
## F-statistic: 97.36 on 1 and 1200 DF, p-value: < 2.2e-16
model.rob$coefficients
## (Intercept) TIME
## 3469.7595 332.2948
model.rob16$coefficients
## (Intercept) TIME
## 3189.6572 393.8887
model.rob17$coefficients
## (Intercept) TIME
## 2793.1545 476.7681
model.rob18$coefficients
## (Intercept) TIME
## 2783.7495 478.6156
model.rob19$coefficients
## (Intercept) TIME
## -1411.703 1252.761
#Analysis of Variance
m.interaction2 <- lm(COST_OPR_Real ~ TIME*typeproc2, data = data3)
anova(m.interaction2)
## Analysis of Variance Table
##
## Response: COST_OPR_Real
## Df Sum Sq Mean Sq F value Pr(>F)
## TIME 1 2.3282e+09 2328190528 625.1016 < 2.2e-16 ***
## typeproc2 4 1.5788e+07 3946985 1.0597 0.3747
## TIME:typeproc2 4 2.1189e+08 52973147 14.2229 1.394e-11 ***
## Residuals 11256 4.1923e+10 3724499
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m.interaction2$coefficients
## (Intercept) TIME typeproc2ROB_16-20
## 3469.75952 332.29484 -280.10227
## typeproc2ROB_17-20 typeproc2ROB_18-20 typeproc2ROB_19-20
## -676.60497 -686.01007 -4881.46255
## TIME:typeproc2ROB_16-20 TIME:typeproc2ROB_17-20 TIME:typeproc2ROB_18-20
## 61.59384 144.47330 146.32080
## TIME:typeproc2ROB_19-20
## 920.46653
m.lst2 <- lstrends(m.interaction2, "typeproc2", var="TIME") ; m.lst2
## typeproc2 TIME.trend SE df lower.CL upper.CL
## ROB_15-20 332 27.8 11256 278 387
## ROB_16-20 394 29.7 11256 336 452
## ROB_17-20 477 39.0 11256 400 553
## ROB_18-20 479 64.0 11256 353 604
## ROB_19-20 1253 126.1 11256 1006 1500
##
## Confidence level used: 0.95
pairs(m.lst2)
## contrast estimate SE df t.ratio p.value
## (ROB_15-20) - (ROB_16-20) -61.59 40.6 11256 -1.516 0.5520
## (ROB_15-20) - (ROB_17-20) -144.47 47.9 11256 -3.017 0.0215
## (ROB_15-20) - (ROB_18-20) -146.32 69.7 11256 -2.099 0.2206
## (ROB_15-20) - (ROB_19-20) -920.47 129.1 11256 -7.130 <.0001
## (ROB_16-20) - (ROB_17-20) -82.88 49.0 11256 -1.691 0.4396
## (ROB_16-20) - (ROB_18-20) -84.73 70.5 11256 -1.202 0.7504
## (ROB_16-20) - (ROB_19-20) -858.87 129.5 11256 -6.631 <.0001
## (ROB_17-20) - (ROB_18-20) -1.85 74.9 11256 -0.025 1.0000
## (ROB_17-20) - (ROB_19-20) -775.99 132.0 11256 -5.880 <.0001
## (ROB_18-20) - (ROB_19-20) -774.15 141.4 11256 -5.476 <.0001
##
## P value adjustment: tukey method for comparing a family of 5 estimates
set.seed(5877)
data3 %>%
ggplot(aes(x=TIME,
y=COST_OPR_Real,
color=typeproc2))+
geom_point()+
geom_smooth(method="lm")
set.seed(5877)
data3 %>%
ggplot(aes(x=TIME,
y=COST_OPR_Real,
color=typeproc2))+
geom_smooth(method="lm")